run cellranger with Spp1-EGFP added reference index
three models: EAE, AD, SIMPLE
each has one GFP(Spp1+), MIG(total microglia), CTL(WT)
AD.GFP.2 is from homozygous model while others are all heterozygous, so
need to be excluded before final clustering
loading 65k cells, CX3CR1+
demo, cellranger called 39,425 cells
plus, cellranger called 43,176 cells
filt.10x <- Read10X(data.dir = "../output_plus_exogene/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`
dim(GEX)
## [1] 32286 43176
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCAAGAAGGATG-1 AAACCCAAGAAGTCCG-1 AAACCCAAGAGAAGGT-1
## Xkr4 . . .
## Gm1992 . . .
## Gm19938 . . .
## Gm37381 . . .
## Rp1 . . .
## Sox17 . . .
## AAACCCAAGCACTCAT-1 AAACCCAAGTATGTAG-1 AAACCCAAGTCATCCA-1
## Xkr4 . . .
## Gm1992 . . .
## Gm19938 . . .
## Gm37381 . . .
## Rp1 . . .
## Sox17 . . .
dim(FB)
## [1] 10 43176
FB[,1:6]
## 10 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCAAGAAGGATG-1 AAACCCAAGAAGTCCG-1 AAACCCAAGAGAAGGT-1
## EAE.GFP 15 19 14
## EAE.MIG 24 11 13
## EAE.CTL 303 6 13
## AD.GFP.1 6 7 229
## AD.GFP.2 5 1 3
## AD.MIG 11 7 7
## AD.CTL 2 3 2
## SIM.GFP 10 2 6
## SIM.MIG 8 64 3
## SIM.CTL 8 3 6
## AAACCCAAGCACTCAT-1 AAACCCAAGTATGTAG-1 AAACCCAAGTCATCCA-1
## EAE.GFP 6 197 25
## EAE.MIG 15 15 31
## EAE.CTL 4 184 25
## AD.GFP.1 125 4 20
## AD.GFP.2 2 5 12
## AD.MIG 9 9 11
## AD.CTL 1 1 6
## SIM.GFP 5 10 8
## SIM.MIG 6 88 12
## SIM.CTL 3 3 138
rowSums(FB)
## EAE.GFP EAE.MIG EAE.CTL AD.GFP.1 AD.GFP.2 AD.MIG AD.CTL SIM.GFP
## 2540456 4085857 1197867 1367832 618839 1171724 371447 832225
## SIM.MIG SIM.CTL
## 907463 482995
rowSums(FB>0)
## EAE.GFP EAE.MIG EAE.CTL AD.GFP.1 AD.GFP.2 AD.MIG AD.CTL SIM.GFP
## 43132 43138 43045 42173 38869 42474 35633 42657
## SIM.MIG SIM.CTL
## 41787 40281
# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "Spp1ND")
FB.seur
## An object of class Seurat
## 10 features across 43176 samples within 1 assay
## Active assay: RNA (10 features, 0 variable features)
rownames(FB.seur)
## [1] "EAE.GFP" "EAE.MIG" "EAE.CTL" "AD.GFP.1" "AD.GFP.2" "AD.MIG"
## [7] "AD.CTL" "SIM.GFP" "SIM.MIG" "SIM.CTL"
perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html
# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features
first define FB colors based on conditions
scales::show_col(ggsci::pal_igv("default")(49))
color.FB <- ggsci::pal_igv("default")(49)[c(8,33,40,
34,26,1,28,
2,43,18)]
#2,13,33,1,15,
#34,26,28,40,18
level.FB <- c(rownames(FB.seur))
color.FBraw <- c("#FF6C91","lightgrey",color.FB)
scales::show_col(color.FBraw, ncol = 5)
scales::show_col(color.FB, ncol = 5)
par(mfrow=c(2,5))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
plot(sort(t(FB)[,5]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
plot(sort(t(FB)[,9]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[9])
plot(sort(t(FB)[,10]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[10])
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,5))
plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("EAE|AD|SIM",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")
plot(table.demux$quantile, pch=16, ylab="mod-quantile")
plot.new()
plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])
plot(table.demux[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux)[2+9])
plot(table.demux[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux)[2+10])
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,5))
plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("EAE|AD|SIM",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")
plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
plot.new()
plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])
plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])
plot(table.demux.s[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux.s)[2+9])
plot(table.demux.s[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux.s)[2+10])
(tags1-6/8/9 set q0.998, tags7/10 q0.99)
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.994)
## Cutoff for EAE.GFP : 40 reads
## Cutoff for EAE.MIG : 55 reads
## Cutoff for EAE.CTL : 32 reads
## Cutoff for AD.GFP.1 : 19 reads
## Cutoff for AD.GFP.2 : 13 reads
## Cutoff for AD.MIG : 22 reads
## Cutoff for AD.CTL : 14 reads
## Cutoff for SIM.GFP : 26 reads
## Cutoff for SIM.MIG : 23 reads
## Cutoff for SIM.CTL : 22 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 15237 1192 26747
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative EAE.GFP EAE.MIG EAE.CTL AD.GFP.1 AD.GFP.2 AD.MIG
## 15237 1192 3042 3083 3014 3003 1828 2862
## AD.CTL SIM.GFP SIM.MIG SIM.CTL
## 2596 1579 3013 2727
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.996)
## Cutoff for EAE.GFP : 42 reads
## Cutoff for EAE.MIG : 59 reads
## Cutoff for EAE.CTL : 34 reads
## Cutoff for AD.GFP.1 : 20 reads
## Cutoff for AD.GFP.2 : 13 reads
## Cutoff for AD.MIG : 23 reads
## Cutoff for AD.CTL : 15 reads
## Cutoff for SIM.GFP : 27 reads
## Cutoff for SIM.MIG : 25 reads
## Cutoff for SIM.CTL : 24 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 14885 1325 26966
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative EAE.GFP EAE.MIG EAE.CTL AD.GFP.1 AD.GFP.2 AD.MIG
## 14885 1325 3089 3121 3035 3039 1855 2891
## AD.CTL SIM.GFP SIM.MIG SIM.CTL
## 2618 1596 3023 2699
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.998)
## Cutoff for EAE.GFP : 46 reads
## Cutoff for EAE.MIG : 64 reads
## Cutoff for EAE.CTL : 38 reads
## Cutoff for AD.GFP.1 : 22 reads
## Cutoff for AD.GFP.2 : 15 reads
## Cutoff for AD.MIG : 26 reads
## Cutoff for AD.CTL : 17 reads
## Cutoff for SIM.GFP : 30 reads
## Cutoff for SIM.MIG : 27 reads
## Cutoff for SIM.CTL : 26 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 14340 1542 27294
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative EAE.GFP EAE.MIG EAE.CTL AD.GFP.1 AD.GFP.2 AD.MIG
## 14340 1542 3166 3196 3062 3099 1874 2935
## AD.CTL SIM.GFP SIM.MIG SIM.CTL
## 2626 1610 3057 2669
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for EAE.GFP : 37 reads
## Cutoff for EAE.MIG : 51 reads
## Cutoff for EAE.CTL : 30 reads
## Cutoff for AD.GFP.1 : 17 reads
## Cutoff for AD.GFP.2 : 12 reads
## Cutoff for AD.MIG : 20 reads
## Cutoff for AD.CTL : 13 reads
## Cutoff for SIM.GFP : 24 reads
## Cutoff for SIM.MIG : 21 reads
## Cutoff for SIM.CTL : 20 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 15819 1083 26274
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative EAE.GFP EAE.MIG EAE.CTL AD.GFP.1 AD.GFP.2 AD.MIG
## 15819 1083 2983 3021 2951 2936 1802 2818
## AD.CTL SIM.GFP SIM.MIG SIM.CTL
## 2552 1548 2959 2704
# tags q0.99
#cutoff.FB <- c(37,51,30,17,12,
# 20,13,24,21,20)
# tags1-6/8/9 set q0.996
#cutoff.FB <- c(42,59,34,20,13,
# 23,13,27,25,20)
# tags1-6/8/9 set q0.998
cutoff.FB <- c(46,64,38,22,15,
26,13,30,27,20)
names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## EAE.GFP EAE.MIG EAE.CTL AD.GFP.1 AD.GFP.2 AD.MIG AD.CTL SIM.GFP
## 46 64 38 22 15 26 13 30
## SIM.MIG SIM.CTL
## 27 20
par(mfrow=c(2,5))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])
plot(sort(t(FB)[,9]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[9])
abline(h=cutoff.FB[9],col = color.FB[9])
plot(sort(t(FB)[,10]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[10])
abline(h=cutoff.FB[10],col = color.FB[10])
custom.Demux <- function(FBobj,FBcutoff){
# define decision matrix
dd <- FBobj@assays[['HTO']]@counts
dd <- apply(dd, 2, function(x){
x > FBcutoff
})
x1 <- colSums(dd)
x1[x1>1] <- "Doublet"
x1[x1==0] <- "Negative"
x1[x1==1] <- "Singlet"
x2 <- x1
bc.slt <- names(x1)[x1=="Singlet"]
for(bc in bc.slt){
x2[bc] <- rownames(dd)[dd[,bc]]
}
FBdf <- data.frame(HTO_classification.global=factor(x1,
levels = c("Doublet","Negative","Singlet")),
hash.ID=factor(x2,
levels = c("Doublet","Negative",rownames(dd))))
return(FBdf)
}
df.FB <- custom.Demux(FB.seur,cutoff.FB)
dim(df.FB)
## [1] 43176 2
df.FB[1:10,]
## HTO_classification.global hash.ID
## AAACCCAAGAAGGATG-1 Singlet EAE.CTL
## AAACCCAAGAAGTCCG-1 Singlet SIM.MIG
## AAACCCAAGAGAAGGT-1 Singlet AD.GFP.1
## AAACCCAAGCACTCAT-1 Singlet AD.GFP.1
## AAACCCAAGTATGTAG-1 Doublet Doublet
## AAACCCAAGTCATCCA-1 Singlet SIM.CTL
## AAACCCAAGTCGGCAA-1 Doublet Doublet
## AAACCCAAGTCGGCCT-1 Singlet EAE.GFP
## AAACCCAAGTGTTGTC-1 Doublet Doublet
## AAACCCACAAACTAAG-1 Singlet SIM.MIG
table(df.FB)
## hash.ID
## HTO_classification.global Doublet Negative EAE.GFP EAE.MIG EAE.CTL AD.GFP.1
## Doublet 14706 0 0 0 0 0
## Negative 0 1280 0 0 0 0
## Singlet 0 0 3128 3134 3023 3052
## hash.ID
## HTO_classification.global AD.GFP.2 AD.MIG AD.CTL SIM.GFP SIM.MIG SIM.CTL
## Doublet 0 0 0 0 0 0
## Negative 0 0 0 0 0 0
## Singlet 1853 2901 2665 1584 3014 2836
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
## orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGAAGGATG-1 Spp1ND 392 10 392 10
## AAACCCAAGAAGTCCG-1 Spp1ND 123 10 123 10
## AAACCCAAGAGAAGGT-1 Spp1ND 296 10 296 10
## AAACCCAAGCACTCAT-1 Spp1ND 176 10 176 10
## HTO_maxID HTO_secondID HTO_margin HTO_classification
## AAACCCAAGAAGGATG-1 EAE.CTL SIM.CTL 2.267075 EAE.CTL
## AAACCCAAGAAGTCCG-1 SIM.MIG EAE.GFP 1.596241 SIM.MIG
## AAACCCAAGAGAAGGT-1 AD.GFP.1 SIM.CTL 2.543885 AD.GFP.1
## AAACCCAAGCACTCAT-1 AD.GFP.1 AD.MIG 2.033227 AD.GFP.1
## HTO_classification.global hash.ID
## AAACCCAAGAAGGATG-1 Singlet EAE.CTL
## AAACCCAAGAAGTCCG-1 Singlet SIM.MIG
## AAACCCAAGAGAAGGT-1 Singlet AD.GFP.1
## AAACCCAAGCACTCAT-1 Singlet AD.GFP.1
## if all using same q, then run this chunk instead of several custom ones above
#
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
# levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
# levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,32500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1680,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=5,
cols = color.FB)
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=5,same.y.lims= TRUE,
cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID")
# first, remove negative cells from th object
#FB.seur.subset <- FB.seur
# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"
#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:10, perplexity = 500, check_duplicates = FALSE)
#saveRDS(FB.seur.subset, "FB0829.seur.subset.rds")
FB.seur.subset <- readRDS("FB0829.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(rownames(FB.seur)),
cols = color.FB) + labs(title = "FB Max_ID"),
DimPlot(FB.seur.subset, order = rev(c("Negative",rownames(FB.seur),"Doublet")),
group.by = 'hash.ID', label = F,
cols = c("lightgrey",color.FB,"#FF6C91")) +
labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)
FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38"))
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.015)
table(FB.seur@meta.data$hash.ID)
##
## Doublet Negative EAE.GFP EAE.MIG EAE.CTL AD.GFP.1 AD.GFP.2 AD.MIG
## 14706 1280 3128 3134 3023 3052 1853 2901
## AD.CTL SIM.GFP SIM.MIG SIM.CTL
## 2665 1584 3014 2836
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID)
barplot(sl_stat,ylim = c(0,23800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+1575,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html
GEX.seur <- CreateSeuratObject(counts = GEX,
min.cells = 3,
min.features = 200,
project = "Spp1ND")
GEX.seur
## An object of class Seurat
## 17590 features across 43175 samples within 1 assay
## Active assay: RNA (17590 features, 0 variable features)
grep("^mt-",rownames(GEX),value = T)
## [1] "mt-Nd1" "mt-Nd2" "mt-Co1" "mt-Co2" "mt-Atp8" "mt-Atp6" "mt-Co3"
## [8] "mt-Nd3" "mt-Nd4l" "mt-Nd4" "mt-Nd5" "mt-Nd6" "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)
RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)
# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
##
## Doublet Negative EAE.GFP EAE.MIG EAE.CTL AD.GFP.1 AD.GFP.2 AD.MIG
## 14706 1280 3127 3134 3023 3052 1853 2901
## AD.CTL SIM.GFP SIM.MIG SIM.CTL
## 2665 1584 3014 2836
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, split.by = "FB.info")
GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative")
GEX.seur
## An object of class Seurat
## 17590 features across 27189 samples within 1 assay
## Active assay: RNA (17590 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, split.by = "FB.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, split.by = "FB.info")
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
GEX.seur <- subset(GEX.seur, subset = percent.mt < 10 & nFeature_RNA > 650 & nFeature_RNA < 3000 & nCount_RNA < 10000)
GEX.seur
## An object of class Seurat
## 17590 features across 24766 samples within 1 assay
## Active assay: RNA (17590 features, 0 variable features)
filtered obj
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = color.FBraw,
pt.size = 0.1, split.by = "FB.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = color.FBraw,
pt.size = 0, split.by = "FB.info")
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID)
barplot(sl_stat,ylim = c(0,18800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+1375,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,4200),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+275,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))
GEX.seur <- CellCycleScoring(GEX.seur,
s.features = s.genes,
g2m.features = g2m.genes)
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info",
ncol = 2, pt.size = 0.1)
GEX.seur.cc <- GEX.seur
GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)
GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')
GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)
# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2
head(VariableFeatures(GEX.seur), 200)
## [1] "Cxcl10" "Spp1" "Ccl4" "Cxcl13"
## [5] "Ccl5" "Cd74" "H2-Aa" "Cxcl9"
## [9] "H2-Eb1" "Ccl3" "Spp1-EGFP" "Mmp12"
## [13] "Ccl12" "H2-Ab1" "Pf4" "Mrc1"
## [17] "Gpnmb" "S100a6" "Ccl2" "Rsad2"
## [21] "Fn1" "Cck" "Top2a" "Cst7"
## [25] "Ttr" "Mki67" "Lgals3" "Lpl"
## [29] "Iigp1" "Fabp5" "Il1b" "Ccl6"
## [33] "Lyve1" "Mgl2" "Apoc1" "Ifi27l2a"
## [37] "Cd209f" "Egr1" "Saa3" "Gdf15"
## [41] "Ccr7" "S100a4" "Il1rn" "Ifitm3"
## [45] "Ccl22" "Cxcl2" "Wfdc17" "Ifit3"
## [49] "Napsa" "Apoc4" "Ptgds" "Prc1"
## [53] "Ube2c" "Plac8" "Ly6c2" "Egr3"
## [57] "Ms4a7" "Ch25h" "AA467197" "Cd163"
## [61] "Igfbp5" "F13a1" "Apoe" "Stmn1"
## [65] "Cd209a" "Pclaf" "Ifnb1" "Ifit2"
## [69] "Hbb-bs" "Lyz2" "Snca" "Hist1h1b"
## [73] "Serpinb6b" "Cenpf" "Ifit1" "Ccl7"
## [77] "Tnf" "Hba-a2" "Hba-a1" "Hbb-bt"
## [81] "Igf1" "Alas2" "Ccl8" "Cd5l"
## [85] "Gm1673" "Cd200" "Ank" "Ccl17"
## [89] "Nusap1" "Cd209g" "Ly6k" "Dqx1"
## [93] "Cdkn1a" "Isg15" "Clu" "Ifi203"
## [97] "Hmmr" "Aldh1a3" "Gm45184" "Crip1"
## [101] "Cox6a2" "Gm26885" "A330032B11Rik" "Cd209b"
## [105] "Ms4a4b" "Clec4d" "Gm43409" "Slc7a11"
## [109] "Wdcp" "Vim" "Iglc2" "Clec10a"
## [113] "Itgax" "Ramp3" "Cd52" "Birc5"
## [117] "Clec4e" "Glipr2" "Ahnak" "Csf1"
## [121] "Cybb" "Enpp2" "Ifi205" "Cenpa"
## [125] "Cd72" "Nr4a1" "Ctla2a" "Plbd1"
## [129] "Postn" "Syngr1" "Gda" "Lgals1"
## [133] "Gm49756" "Emb" "Ly6a" "B230312C02Rik"
## [137] "Atf3" "Gbp2" "Rnd3" "Olfr1369-ps1"
## [141] "Gnas" "Cbr2" "Ctsd" "Ifitm6"
## [145] "Il2rb" "Prdx1" "Gm26737" "Nkg7"
## [149] "Dennd5b" "Stat4" "Adgre5" "C4b"
## [153] "Kit" "Bst2" "Plp1" "Tnfrsf18"
## [157] "Tyrobp" "Serpinb1a" "Serpina3g" "Spn"
## [161] "Trbc2" "Ramp1" "Lilrb4a" "Cd69"
## [165] "Mif" "Edn1" "Ctsb" "Cd38"
## [169] "Pbld1" "Skap1" "Aplp2" "Tcap"
## [173] "S100a11" "Cd300e" "Krt80" "Trbc1"
## [177] "Mndal" "Clec4b1" "Chac1" "Neil3"
## [181] "B930036N10Rik" "Ifit3b" "Il4i1" "Slfn5"
## [185] "Folr1" "Traf1" "Ccnb2" "Ifi204"
## [189] "Ccl9" "Ifitm2" "Ccnd2" "Slco3a1"
## [193] "Cdca8" "Pbk" "Hif1a" "Ccr1"
## [197] "Acod1" "Lgals3bp" "Gm29773" "Ifi207"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix
# exclude MT genes and more
#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)
DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))
VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,
DIG,
CC_gene) )
GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1
## Positive: Crybb1, Ecscr, Hpgd, Lrba, Slc2a5, Camk2n1, Stab1, Lst1, Filip1l, Slc40a1
## Fcrls, Il7r, Sox4, Ddit4, Bank1, Tent5a, Serpinf1, Btg2, Fam102b, Klf7
## Nav3, Gbp7, Klk8, Upk1b, Cask, Hlf, Fry, Tsix, Nfkbia, Tmem52
## Negative: Cst7, Apoe, Ctsd, Fabp5, Ctsb, Tyrobp, Fth1, Lpl, Igf1, Lyz2
## Cd63, Ctsz, Cd52, Ank, Mif, Gnas, Ftl1, Csf1, B2m, Npc2
## Itgax, Cd9, Ccl3, Hif1a, Cox6a2, Eef1a1, Lgals3bp, Trem2, Aplp2, Syngr1
## PC_ 2
## Positive: Trem2, Ctsd, Ank, Tyrobp, Ccl6, Ramp1, Igf1, Serpine2, Baiap2l2, Itgax
## Hexa, Mamdc2, Syngr1, Cd68, Hpgd, Dkk2, Cst7, Cadm1, Cd9, St8sia6
## Fabp5, Creg1, Scd2, Aplp2, Lpl, Lgi2, Arap2, Slc12a2, Hif1a, Crip1
## Negative: Ifit3, Isg15, Iigp1, Ccl12, Ifit2, Ifitm3, Ifi204, Ifit1, Rsad2, Ifi211
## Irf7, Ccl2, Slfn5, Ifi207, Ifi206, Zbp1, Oasl2, Ifit3b, Oasl1, Stat1
## Slfn2, Ifi209, Rtp4, Fgl2, Usp18, Ifi213, Fcgr4, Gbp2, Bst2, Cxcl10
## PC_ 3
## Positive: Trem2, Ctsd, Tyrobp, Ifit3, Ifit2, Serpine2, Hexa, Ifit1, Ifit3b, Oasl2
## Ramp1, Ctsl, Slfn5, Cst7, Baiap2l2, Rsad2, Rtp4, Isg15, Syngr1, Grn
## Ly86, Usp18, Lgi2, B2m, Ifi206, Timp2, Fau, Zfas1, Irf7, Ifi27l2a
## Negative: Lgals3, Fn1, Lilrb4a, Vim, Pclaf, Id2, Ccl4, Cd72, Bcl2a1d, Lilr4b
## Iqgap1, Emp3, Atf3, Cd36, Nes, Stmn1, Tpm4, Spp1-EGFP, Glipr1, Prc1
## Spc24, Lgals1, Fxyd5, Spp1, Cdkn1a, Lpl, Tagln2, Cybb, Mmp12, Actg1
## PC_ 4
## Positive: Stmn1, Fcrls, Fam102b, Pclaf, Serpine2, Slc2a5, Crybb1, Hpgd, Prc1, Spc24
## Grn, Ecscr, Lrba, Calr, Pbk, Tk1, Sox4, Cdk6, Esco2, Ifit2
## Lockd, Knl1, Dek, Scd2, Fkbp2, Smc2, Ccna2, Psat1, Ezh2, Rsad2
## Negative: Apoe, H2-D1, Fth1, Cd74, Fau, H2-K1, H2-Aa, H2-Ab1, Lyz2, H2-Eb1
## H2-Q7, B2m, C3, C4b, Eef1a1, Apoc1, Cd52, Ms4a7, Eef1b2, Cybb
## Ftl1, Ccl5, Gas5, Cd63, H2-Q6, Arhgap15, Pla2g7, Slamf7, Ifi30, Apoc2
## PC_ 5
## Positive: Pclaf, Ly86, Prc1, Stmn1, Spc24, B2m, Pbk, Tk1, Fau, Esco2
## Knl1, Lockd, Racgap1, Ccna2, Mis18bp1, Cd52, H2-D1, Ifi27l2a, Ccnf, Smc2
## Kif15, Spc25, Ezh2, Asf1b, H2-Q7, Knstrn, Trim59, Dut, H2-K1, Tspo
## Negative: Lilrb4a, Lgals3, Ccl4, Atf3, Rsad2, Ifit1, Ifit3, Ccl3, Ifit3b, Plau
## Ms4a7, Slc15a3, Plaur, Ifit2, Slfn5, Ifi207, Rgs1, Fam20c, Fcrls, Lilr4b
## Dab2, Iqgap1, Gpnmb, Csf1, Igf1, Rab7b, Ch25h, Stab1, Lpl, Id2
length(VariableFeatures(GEX.seur))
## [1] 1199
head(VariableFeatures(GEX.seur),300)
## [1] "Cxcl10" "Spp1" "Ccl4" "Cxcl13" "Ccl5" "Cd74"
## [7] "H2-Aa" "Cxcl9" "H2-Eb1" "Ccl3" "Spp1-EGFP" "Mmp12"
## [13] "Ccl12" "H2-Ab1" "Pf4" "Mrc1" "Gpnmb" "S100a6"
## [19] "Ccl2" "Rsad2" "Fn1" "Cck" "Cst7" "Ttr"
## [25] "Lgals3" "Lpl" "Iigp1" "Fabp5" "Il1b" "Ccl6"
## [31] "Lyve1" "Mgl2" "Apoc1" "Ifi27l2a" "Cd209f" "Egr1"
## [37] "Saa3" "Gdf15" "Ccr7" "S100a4" "Il1rn" "Ifitm3"
## [43] "Ccl22" "Cxcl2" "Wfdc17" "Ifit3" "Napsa" "Apoc4"
## [49] "Ptgds" "Prc1" "Plac8" "Ly6c2" "Egr3" "Ms4a7"
## [55] "Ch25h" "Cd163" "Igfbp5" "F13a1" "Apoe" "Stmn1"
## [61] "Cd209a" "Pclaf" "Ifnb1" "Ifit2" "Lyz2" "Snca"
## [67] "Serpinb6b" "Ifit1" "Ccl7" "Tnf" "Igf1" "Alas2"
## [73] "Ccl8" "Cd5l" "Cd200" "Ank" "Ccl17" "Cd209g"
## [79] "Ly6k" "Dqx1" "Cdkn1a" "Isg15" "Clu" "Ifi203"
## [85] "Aldh1a3" "Crip1" "Cox6a2" "Cd209b" "Ms4a4b" "Clec4d"
## [91] "Slc7a11" "Wdcp" "Vim" "Clec10a" "Itgax" "Ramp3"
## [97] "Cd52" "Clec4e" "Glipr2" "Ahnak" "Csf1" "Cybb"
## [103] "Enpp2" "Ifi205" "Cd72" "Nr4a1" "Ctla2a" "Plbd1"
## [109] "Postn" "Syngr1" "Gda" "Lgals1" "Emb" "Ly6a"
## [115] "Atf3" "Gbp2" "Rnd3" "Gnas" "Cbr2" "Ctsd"
## [121] "Ifitm6" "Il2rb" "Prdx1" "Nkg7" "Dennd5b" "Stat4"
## [127] "Adgre5" "C4b" "Kit" "Bst2" "Plp1" "Tnfrsf18"
## [133] "Tyrobp" "Serpinb1a" "Serpina3g" "Spn" "Ramp1" "Lilrb4a"
## [139] "Cd69" "Mif" "Edn1" "Ctsb" "Cd38" "Pbld1"
## [145] "Skap1" "Aplp2" "Tcap" "S100a11" "Cd300e" "Krt80"
## [151] "Mndal" "Clec4b1" "Chac1" "Neil3" "Ifit3b" "Il4i1"
## [157] "Slfn5" "Folr1" "Ifi204" "Ccl9" "Ifitm2" "Ccnd2"
## [163] "Slco3a1" "Pbk" "Hif1a" "Ccr1" "Acod1" "Lgals3bp"
## [169] "Ifi207" "Cd3d" "Ifi44" "Ifi211" "Flt1" "Olr1"
## [175] "Serpine2" "Cd93" "Fth1" "Cd63" "Il2ra" "Sprr1a"
## [181] "Cp" "Fxyd5" "Nudt17" "Trib3" "Ccnb1" "Ecrg4"
## [187] "Knl1" "Pou3f1" "Fxyd1" "Fcgr4" "Mamdc2" "Ftl1"
## [193] "Ly75" "Lyz1" "Serpinb8" "Id2" "Ctsz" "Oasl1"
## [199] "Colec12" "Clec12a" "Cxcl14" "Lockd" "Ccr2" "Sirpb1c"
## [205] "Enah" "Gadd45b" "Phlda3" "Irf7" "Cd36" "Hp"
## [211] "Dkk2" "Serpinh1" "Bex1" "Klrd1" "Esco2" "Ms4a4c"
## [217] "Ifitm1" "Baiap2l2" "Cacnb3" "Ntrk2" "Klra7" "Rhox5"
## [223] "Ctsl" "Pglyrp1" "Ctsw" "Lgi2" "Nedd4" "Capg"
## [229] "Xcl1" "Oasl2" "Adgrg6" "Rgs1" "Trem2" "Rab3c"
## [235] "Tnip3" "Cstb" "Timd4" "Gapdh" "Ccdc63" "Gpr141"
## [241] "Ear2" "Gria2" "H2-Q7" "Aspm" "Calca" "Kcnk9"
## [247] "Rep15" "Cspg4" "Scd2" "Dab2" "Trpm3" "Mt1"
## [253] "Cd9" "Vcam1" "Ccna2" "Bcl2l14" "Slamf7" "Gimap3"
## [259] "Il12b" "Gdf3" "Kcnq1ot1" "Shcbp1" "Grb14" "Crlf2"
## [265] "Pkm" "Spc24" "Net1" "Nes" "Axl" "Spag5"
## [271] "Cfp" "Igfbp2" "Adamts1" "Timp2" "Pld3" "Spint2"
## [277] "Atp6v0d2" "Gpr84" "S100a10" "Loxl2" "Hbegf" "Rcan1"
## [283] "Cd7" "Tnfrsf11b" "Bcl2a1d" "Zbp1" "Olfr889" "Ptgs2"
## [289] "Fabp3" "Tk1" "Fabp4" "Tlr2" "Pcp4" "Ifi208"
## [295] "Egfl7" "Klrk1" "Tspo" "Sag" "Pcgf2" "Usp18"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)
DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)
ElbowPlot(GEX.seur,ndims = 50)
PCs <- 1:25
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 10)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 1.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 24766
## Number of edges: 285496
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7557
## Number of communities: 27
## Elapsed time: 2 seconds
## 3 singletons identified. 24 final clusters.
GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 50, seed.use = 666)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 17:09:21 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:09:21 Read 24766 rows and found 25 numeric columns
## 17:09:21 Using Annoy for neighbor search, n_neighbors = 50
## 17:09:21 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:09:24 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpS6e24x\filecab8362635c
## 17:09:24 Searching Annoy index using 1 thread, search_k = 5000
## 17:09:37 Annoy recall = 100%
## 17:09:38 Commencing smooth kNN distance calibration using 1 thread
## 17:09:40 Initializing from normalized Laplacian + noise
## 17:09:42 Commencing optimization for 200 epochs, with 1898574 positive edges
## 17:10:15 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T, group.by = "seurat_clusters") +
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters")
FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))
DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'tsne', group.by = "FB.info", split.by = "FB.info",
ncol = 5, cols = color.FB)
DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info",
ncol = 5, cols = color.FB)
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "FB.info", cols = color.FB)
markers.mig <- c("Ptprc",#"Cd3d","Cd3e","Cd19",
"Cd74","Lyz2","Ccl4",
"Aif1","P2ry12","C1qa","Spp1",
"Top2a","Pcna","Mki67","Mcm6",
"Cx3cr1","Il4ra","Il13ra1","Spp1-EGFP",
"Fabp5","Hmox1","Ms4a7","Cenpa")
FeaturePlot(GEX.seur,
features = markers.mig,
ncol = 4)
DotPlot(GEX.seur, features = markers.mig, group.by = "FB.info",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
DotPlot(GEX.seur, features = markers.mig, group.by = "seurat_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
clusters=GEX.seur$seurat_clusters)[c(3:12),],
main = "Cell Count",
gaps_row = c(3,7),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,
pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
clusters=GEX.seur$seurat_clusters)[c(3:12),]),
main = "Cell Ratio",
gaps_row = c(3,7),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)
GEX.seur$sort_clusters <- factor(GEX.seur$seurat_clusters,
levels = c(1,3,0,2,10,9,
19,21,
4,16,13,11,20,
5,12,14,17,
18,6,8,7,15,22,23))
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
clusters=GEX.seur$sort_clusters)[c(3:12),],
main = "Cell Count",
gaps_row = c(3,7),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,
pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
clusters=GEX.seur$sort_clusters)[c(3:12),]),
main = "Cell Ratio",
gaps_row = c(3,7),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)
DotPlot(GEX.seur, features = markers.mig, group.by = "sort_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
check.Cell2017 <- c("Hexb","Cst3","P2ry12","Cx3cr1",
"Cd9","Ctsd","Cst7","Lpl",
"Mrc1","Cd163","S100a4","Cd74",
"Cd79b","Rag1","Trbc2","Nkg7",
"S100a9","Camp"
)
DotPlot(GEX.seur, features = check.Cell2017, group.by = "sort_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)
## Warning in FetchData(object = object, vars = features, cells = cells): The
## following requested variables were not found: Rag1, Camp
markers.Sci2019 <- c("Tmem119","Hexb","Slc2a5","P2ry12","Siglech","Trem2","Sparc","Olfml3", # Microglia
"Mrc1","Lyve1","Cd163","Siglec1","Pf4","Ms4a7","Cbr2", # CAMs
"Ly6c2","Ccr2","Plac8","Anxa8","Nr4a1","Mertk","Zbtb26","Cd209a", # Monocyte-derived
"Flt3","Zbtb46","Batf3","Itgae","Clec9a", # DCs
"Tubb3" # local added
)
DotPlot(GEX.seur, features = markers.Sci2019, group.by = "sort_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)
## Warning in FetchData(object = object, vars = features, cells = cells): The
## following requested variables were not found: Anxa8
c("Xist","Tsix","Uty","Ddx3y","Eif2s3y","Kdm5d") %in% VariableFeatures(GEX.seur)
## [1] FALSE TRUE FALSE TRUE TRUE FALSE
xy.genes <- c("Xist","Tsix","Uty","Ddx3y","Eif2s3y","Kdm5d")
DotPlot(GEX.seur, features = xy.genes, group.by = "sort_clusters",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)
xy.genes <- c("Xist","Tsix","Uty","Ddx3y","Eif2s3y","Kdm5d")
DotPlot(GEX.seur, features = xy.genes, group.by = "FB.info",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)
# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "sort_clusters"
#GEX.markers.pre <- FindAllMarkers(GEX.seur, only.pos = TRUE, min.pct = 0.05,
# test.use = "MAST",
# #test.use = "wilcox",
# logfc.threshold = 0.25)
GEX.markers.pre <- read.table("sc10x_LYN.marker.sort0829.csv", header = TRUE, sep = ",")
GEX.markers.pre %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
## # A tibble: 190 x 7
## # Groups: cluster [24]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <int> <chr>
## 1 0 1.14 0.76 0.354 0 1 Crybb1
## 2 0 1.06 0.998 0.916 0 1 P2ry12
## 3 0 0.958 0.723 0.341 0 1 Ecscr
## 4 0 0.934 0.995 0.847 0 1 Fcrls
## 5 0 0.932 0.85 0.565 0 1 Cd164
## 6 0 0.877 0.946 0.754 0 1 Maf
## 7 3.69e-264 0.912 0.674 0.349 6.50e-260 1 Slc2a5
## 8 2.21e-209 0.884 0.539 0.25 3.88e-205 1 Sox4
## 9 2.22e-290 0.752 0.997 0.918 3.91e-286 3 P2ry12
## 10 2.98e-212 0.713 0.976 0.853 5.24e-208 3 Fcrls
## # ... with 180 more rows
GEX.markers.pre$cluster <- factor(as.character(GEX.markers.pre$cluster),
levels = levels(GEX.seur$sort_clusters))
markers.pre_t32 <- (GEX.markers.pre %>% group_by(cluster) %>%
filter(pct.1>0.1) %>%
top_n(n = 32, wt = avg_log2FC) %>%
ungroup() %>%
arrange(desc(avg_log2FC*pct.1),gene) %>%
distinct(gene, .keep_all = TRUE) %>%
arrange(cluster,p_val_adj))$gene
markers.pre_t48 <- (GEX.markers.pre %>% group_by(cluster) %>%
filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
top_n(n = 48, wt = avg_log2FC) %>%
ungroup() %>%
arrange(desc(avg_log2FC*pct.1),gene) %>%
distinct(gene, .keep_all = TRUE) %>%
arrange(cluster,p_val_adj))$gene
markers.pre_t60 <- (GEX.markers.pre %>% group_by(cluster) %>%
filter(pct.1>0.01 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
top_n(n = 60, wt = avg_log2FC) %>%
filter(p_val_adj < 0.01) %>%
ungroup() %>%
arrange(desc(avg_log2FC*pct.1),gene) %>%
distinct(gene, .keep_all = TRUE) %>%
arrange(cluster,p_val_adj))$gene
DotPlot(GEX.seur, features = rev(markers.pre_t60[1:64])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(markers.pre_t60[65:128])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(markers.pre_t60[129:192])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(markers.pre_t60[193:256])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(markers.pre_t60[257:320])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(markers.pre_t60[321:384])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(markers.pre_t60[385:448])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(markers.pre_t60[449:512])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(markers.pre_t60[513:576])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(markers.pre_t60[577:640])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
DotPlot(GEX.seur, features = rev(markers.pre_t60[641:689])) + coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
library(DoubletFinder)
## Loading required package: KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## Loading required package: ROCR
## NULL
## [1] "Creating 8255 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Creating 8255 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
ncol = 2,
pt.size = 0.05, group.by = "sort_clusters")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
ncol = 2,
pt.size = 0, group.by = "sort_clusters")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
ncol = 2,
pt.size = 0.05, group.by = "sort_clusters", split.by = "DoubletFinder0.05")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
ncol = 2,
pt.size = 0.05, group.by = "sort_clusters", split.by = "DoubletFinder0.1")
table(GEX.seur@meta.data[,c("sort_clusters","DoubletFinder0.05")])
## DoubletFinder0.05
## sort_clusters Doublet Singlet
## 1 217 2336
## 3 43 1782
## 0 1 2620
## 2 44 2349
## 10 74 1051
## 9 72 1074
## 19 41 173
## 21 0 204
## 4 76 1481
## 16 29 316
## 13 41 715
## 11 103 1013
## 20 19 194
## 5 15 1505
## 12 61 1045
## 14 38 514
## 17 22 315
## 18 1 244
## 6 102 1385
## 8 131 1290
## 7 65 1407
## 15 32 391
## 22 11 92
## 23 0 32
table(GEX.seur@meta.data[,c("sort_clusters","DoubletFinder0.1")])
## DoubletFinder0.1
## sort_clusters Doublet Singlet
## 1 282 2271
## 3 74 1751
## 0 8 2613
## 2 75 2318
## 10 99 1026
## 9 127 1019
## 19 64 150
## 21 1 203
## 4 123 1434
## 16 65 280
## 13 97 659
## 11 225 891
## 20 154 59
## 5 27 1493
## 12 102 1004
## 14 115 437
## 17 37 300
## 18 1 244
## 6 200 1287
## 8 314 1107
## 7 163 1309
## 15 68 355
## 22 53 50
## 23 3 29
# preAnno
GEX.seur$preAnno <- as.character(GEX.seur$seurat_clusters)
GEX.seur$preAnno[GEX.seur$preAnno %in% setdiff(levels(GEX.seur$seurat_clusters), c(21,23,22,15))] <- "Microglia"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(21)] <- "MIG.lowUMI"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(23)] <- "DC"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(22)] <- "BAM"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(15)] <- "Mono"
GEX.seur$preAnno <- factor(GEX.seur$preAnno,
levels = c("Microglia",
"MIG.lowUMI","DC","BAM","Mono"))
# cnt
# EAE - SIM - AD
# CTL - MIG - GFP
GEX.seur$cnt <- factor(gsub(".1|.2","",as.character(GEX.seur$FB.info)),
levels = c("EAE.CTL","EAE.MIG","EAE.GFP",
"SIM.CTL","SIM.MIG","SIM.GFP",
"AD.CTL","AD.MIG","AD.GFP"))
color.cnt <- color.FB[c(3,2,1,
10,9,8,
7,6,4)]
DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "preAnno") +
DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$cnt,
clusters=GEX.seur$preAnno),
main = "Cell Count",
gaps_row = c(3,6),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,
pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$cnt,
clusters=GEX.seur$preAnno)),
main = "Cell Ratio",
gaps_row = c(3,6),
#gaps_col = c(12,13,14),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 2)
#saveRDS(GEX.seur, "sc10x_LYN.marker.sort0829.rds")